##################################
######## EEID R Basics ###########
### 2009 Workshop Exercises ######
########## Stu Field #############
##################################
rm(list = ls())
##################
### Packages #####
##################
require(popbio)
require(gplots) # gdata & gtools must be installed (but not necessarily loaded)
require(gdata)
require(gtools)
require(lattice)
require(odesolve)
require(exactRankTests)
########################
#############################
### Importing data into R ###
##############################
mydata <- read.csv('Tree data.csv', sep=',', header=T) # 'sep' & 'header' are optional; (defaults)

### OR make 1st column the row header (row # = tree #)
mydata <- read.csv('Tree data.csv', sep=',', header=T, row.names= 1)

### OR you can go directly from an .xls file with gdata installed
mydata2 <- read.xls('filename.xls', sheet=1, perl= 'perl')
popdata = read.csv('Pop6C.csv')
 
names(mydata)
attributes(mydata)
sapply(mydata, class); ll(mydata, dim=T) # uses the 'gdata' package
attach(mydata)
summary(mydata)


########################
### Checking for NAs ###
########################
complete.cases(mydata)
any(complete.cases(mydata) == FALSE)
which(!complete.cases(mydata))
mydata <- na.omit(mydata)

################
### Sorting ####
###### & #######
## Organizing ##
################
mydata[order(mydata$dbh), 1:ncol(mydata)]
mydata[rev(order(mydata$dbh)), ]
mydata[order(mydata$spp, -mydata$NobarkArea), ]
tree1 = mydata[rev(order(mydata$SapDepth)), c(1,6,10)]
# OR
tree2 = subset(mydata, select=c(tree,SapDepth,SapArea)) # two steps to reorder
tree2 = tree2[rev(order(tree2$SapDepth)),]

Winter = mydata[mydata$season == 'Winter', ]
# OR
subset(mydata, season == 'Winter')

mydata[mydata$SapArea <= 100 & mydata$SapDepth >= 2, ]
# OR
subset(mydata, SapArea <= 100 & SapDepth >=2, select= c(dbh, SapDepth, SapArea)) # choose columns
subset(mydata, SapArea <= 100 & SapDepth >=2, select= dbh:SapArea) # dby -> SapArea
subset(mydata, SapArea <= 100 & SapDepth >=2, select= -BarkThick) # excludes BarkThick

#########################
#### Vector exercises ###
#########################
vec1 = c(2,5,8,2,1,9,4,7,5,6)
vec2 = rep(c(2,5,9,8), c(3,2,2,4))
vec3 = seq(0,6.5, by=0.5); length(vec3)
all.vecs = c(vec1,vec2,vec3)
mass = runif(10, 10, 75)
mass.d = mass/365
mass.r = round(mass,2)
x.mass = mean(mass.d)
sort(mass.r); sort(mass.r, decreasing=TRUE)
ex.10 = mean(mass.d)*sum(mass.d)/median(mass.d)
stats.v = c(min(mass.r), max(mass.r), mean(mass.r), sum(mass.r), median(mass.r), range(mass.r))

##################################
### Calculations using vectors ###
##################################
LA = 0.1*(dbh^BarkThick) + sqrt(TotalNobarkArea/SapArea)

##################
### The Matrix ###
##################
Mat <- matrix(seq(16,-16, by= -4),3,3, byrow= TRUE)

A <- matrix(c(2, 3, 7, 8.8, 11, 
			  3, 4.3, 8, 9, 12,
			  8, 16, 0.1, 5, 9, 
			  5, 0.4, 9, 1.7, 3,
			  0.7, 6, 5.9, 4, 7), 5, 5, byrow= T)

Sum = rowSums(A)
B <- cbind(A, Sum)
B <- as.matrix(rbind(B, colSums(B)))

pars = c(0.3, 0.1, 0, 1, 0.4, 0.3, 3, 0, 0.8); ipop = c(1,2,4)
M <- matrix(pars, 3, 3); M
x.2 = M %*% ipop

####################
####### Loops ######
####################
### The 'For' Loop
Popn = c(1, rep(0,9))
Gen = length(Popn)
for (n in 2:Gen){
	Popn[n]= Popn[n-1] * 2
	}
Popn

######################
### The 'while' Loop
######################
pop.vec = pop.now = 1; count = 1
while (pop.now <= 25000) {
	pop.now = pop.now * 2
	pop.vec = c(pop.vec, pop.now) 
	count = count + 1
	}
pop.vec; count
plot(1:count, pop.vec, 'b', lwd=1.5)

######################
### Fibonacci Exercise
######################
Fib.v = c(0,1,rep(0,14))
for (n in 3:length(Fib.v)){
	Fib.v[n] = Fib.v[n-2] + Fib.v[n-1]
}
Fib.v

Fibonacci1 <- matrix(Fib.v, 4,4)
Fibonacci2 <- matrix(sample(Fib.v, length(Fib.v), replace=FALSE), 4, 4)

###############################
### Create Projection Matrix 
### (nested for-loop) Exercise
################################
PMat <- matrix(0, 5, 5)
for (c in 1:ncol(PMat)){
	PMat[1,c]= c-1
		for (r in 2:nrow(PMat)){
			PMat[r,r-1]= PMat[1,r] / 10
	}}
PMat

####################################
### Population Projection Exercise
####################################
Gen = 20
StoreMat <- matrix(0, Gen, 3)
pars = c(0.3, 0.1, 0, 1, 0.4, 0.3, 3, 0, 0.8); ipop = c(1,2,4)
M <- matrix(pars, 3, 3) ### Projection matrix from above
StoreMat[1,]= ipop      ### place 1st row as initial population (1,2,4)

### Now the loop ###
for (n in 2:Gen){
	StoreMat[n,]= M %*% StoreMat[n-1,]
}
StoreMat # see what you got

### This will produce the plots of the projections
plot(1:Gen, StoreMat[,1], 'l', col='black', main='Three Class Popn Example', ylab='No. of Individuals', xlab='No. of generations', lwd=2)
lines(1:Gen, StoreMat[,2], 'l', col='dark blue', lwd=2)
lines(1:Gen, StoreMat[,3], 'l', col='dark red', lwd=2)
legend('topleft', legend=c('Class 1','Class 2','Class 3'), lty=1, col=c('black', 'dark blue', 'dark red'), lwd=2, bg='gray95')

### OR; take a quick look at the projection using matplot()
matplot(1:Gen, StoreMat, 'l')

######################
#### Plotting in R ###
######################
### par() sets defaults for future plotting
par(bg='lightblue', mfrow=c(1,3)) # bg colours the background of graphs
par(mfrow=c(1,3)) # create a 1 x 3 array of figures and fill row-wise

#### Fig. 1 below (histogram, boxplot, & scatterplot)#####
#################
### Histogram ### of Sapwood Depth from Tree data.csv
#################
hist(mydata$SapDepth, xlab= 'Sapwood Depth', main='Histogram of Sapwood Depth', col= 'gray50')

###############
### Boxplot ###
###############
boxplot(SapDepth ~ spp, data= mydata, ylab= 'SapDepth', col= 'darkslateblue')

# Or by two sorting factors (season & infection status)
boxplot(SapDepth ~ Infected + season, data= mydata, ylab= 'SapDepth', col= c('darkgreen', 'darkred'))

####################
### Scatter plot ###
####################
plot(mydata$dbh, mydata$Heartwood, pch= 17, col= 'darkred', ylab= 'Area (cm^2)', xlab= 'DBH (cm)', main= 'DBH vs. 2 tree characteristics')
points(mydata$dbh, mydata$SapArea, pch= 19, col= 'darkgreen')
legend('topleft', legend=c('Heardwood', 'Sapwood'), pch= c(17,19), col =c('darkred', 'darkgreen'), bg= 'gray95')

####################
### Symbols plot ###
####################
ottar <- read.csv('habsel.csv'); attach(ottar)
symbols(x, y, circles = captures, inches = 0.125, bg = 'darkblue', fg = 'red')

##############
### xyplot ### for this you need the 'lattice' package
##############
require(lattice)
### Used for Fig. 2 #######
xyplot(SapDepth ~ dbh | spp, data= mydata, xlab='DBH (cm)', ylab='Sapwood Depth', pch= 19, col= 'navy', rows = 2, as.table= TRUE)

xyplot(SapDepth ~ dbh | spp*Infected, data= mydata, xlab='DBH (cm)', ylab='Sapwood Depth', pch= 19, col= 'navy', rows = 2, as.table= TRUE)

################
### Coplot #####
################
### Works perhaps for multivariate data where separating variable is continuous
coplot(SapDepth ~ dbh | spp, data= mydata, xlab='DBH (cm)', ylab='Sapwood Depth', pch=19, col='navy', rows= 2, panel= panel.smooth)

coplot(SapDepth ~ dbh | Heartwood, data= mydata, xlab='DBH (cm)', ylab='Sapwood Depth', pch=19, col='navy', rows= 2, panel= panel.smooth)

###########################
####### The Barplot #######
#### Area without Bark ####
### by spp. & season ######
###########################
dbh.sd.season = tapply(mydata$dbh, mydata$season, sd) ### sd of dbh grouped by season
### Make into vectors for Barplot; NOTE ALPHABETICAL ORDER!!!!
Nobark.season = as.vector(tapply(mydata$NobarkArea, mydata$season, mean))
Nobark.spp = as.vector(tapply(mydata$NobarkArea, mydata$spp, mean))

x.axis1 = levels(mydata$season)    ### levels() will put them alphabetically
x.axis2 = levels(mydata$spp)       ### same thing but now by species

### Make the barplot ###
barplot(Nobark.season, names= x.axis1, ylim= c(0, max(Nobark.season)), col =c('blue','green'), density= c(15,10), angle= c(45,135))
barplot(Nobark.spp, names= x.axis2, ylim= c(0, max(Nobark.spp)))

######################
## Create fake data
## for plotting Ex.     par(mfrow=c(1,2))
## with Uggs
######################
t = 1:30
set.seed(125)
infected = jitter(0.4/(1+exp(0.4*(15-t))), amount= 0.035) # AMOUNT DETERMINES HOW MUCH JITTERING
plot(t, infected, ylim=c(0,0.5), main='Proportion Infected by Uggs with Shorts', xlab='Months since Lauren first did it on The Hills', ylab='Proportion Infected')
grid()

#########################
### Random numbers &
### Distributions 
#########################
par(mfrow=c(2,2))
curve(dnorm, -4, 4, xlab= 'z', ylab= 'Probability Density', main= 'Density', col='darkgreen', lwd=2)
curve(pnorm, -4, 4, xlab= 'z', ylab= 'Probability', main= 'Probability', col='darkgreen', lwd=2)
curve(qnorm, 0, 1, xlab= 'p', ylab= 'Quantiles (z)', main= 'Quantiles', col='darkgreen', lwd=2)
hist(rnorm(1000), xlim= c(-4,4), xlab= 'z', ylab= 'Frequency', main= 'Random Numbers', col='darkgreen')

set.seed(666)
nD = rnorm(1000, 400, 25); L= length(nD)
ks.test(nD, pnorm); hist(nD, col='gray88', main='', xlab='', ylab= 'Frequency')
lines(seq(0,L,1), 10*L*(dnorm(seq(0,L,1), mean(nD), sd(nD))), col='navy', lwd=2)

SapDepth = mydata$SapDepth; L = length(SapDepth)
ks.test(SapDepth, pnorm); hist(SapDepth, col='gray88', main='', xlab='Sapwood Depth', ylab= 'Frequency')
lines(seq(0,L,0.1), L*(dnorm(seq(0,L,0.1), mean(SapDepth), sd(SapDepth))), col='navy', lwd=2)

set.seed(1001)
bD = rbinom(1000, 400, 0.33); L=length(bD)
ks.test(bD, pbinom);
hist(bD, col='gray88', main='', xlab='bD', ylab='Frequency')
lines(min(bD):max(bD), 5*L*(dbinom(min(bD):max(bD), 400, 0.33)), col='navy', lwd=2)

set.seed(135)
nbD = rnbinom(1000, 400, 0.33); L=length(nbD)
ks.test(nbD, pnbinom);
hist(nbD, col='gray88', main='', xlab='nbD', ylab='Frequency')
lines(min(nbD):max(nbD), 50*L*(dnbinom(min(nbD):max(nbD), 400, 0.33)), col='navy', lwd=2)

###########################
## Plotting Equations #####
## using curve() function #
###########################
curve(1/(1+x^2), from=-10, to= 10)
curve(2*x^3 - 8*(x^2) + 2*x + 6, from= -10, to= 10)
curve(0.4/(1+exp(0.4*(15-x))), from=0, to= 30, col= 'darkgreen', lwd= 2, add= T) # for the Uggs data.

#######################
## Using matplot() ####
#######################
popdata <- read.csv("Pop6C.csv"); popdata
matplot(1:20, popdata, 'l', xlab= 'Generations', ylab= 'Number of Individuals per Class', main='Population Projection for 6 Classes', lwd= 1.5); grid()
legend('topleft', legend=c('Class 1','Class 2','Class 3','Class 4','Class 5','Class 6'), lty=c(1:5), col=c(1:6), bg='gray95')




###########
#####################
## Creating your ####
## OWN Functions ####
#####################
###########
rMat <- function(m, n, min, max, dec=0) {
	r <- runif(m*n, min, max)
	A <- matrix(r, m, n)
	round(A, dec)
}

CI95 <- function(x){
	mean <- mean(x)
	sd <- sd(x)
	n <- length(x)
	se <- sd/sqrt(n)
	CI.vec <- c((mean-(1.96*se)), mean, (mean+(1.96*se)))
	CI.vec
}

### Alternatively

CI95n <- function(x, p=95){
	P <- p/100 + (1-(p/100))/2
	mean <- mean(x)
	sd <- sd(x)
	n <- length(x)
	se <- sd/sqrt(n)
	CI <- qnorm(P)*se # assume normal dist
	CI.vec <- c((mean-CI), mean, (mean+CI))
	CI.vec
}

### Or from a t-distribution

CI95t <- function(x, p=95){
	P <- p/100 + (1-(p/100))/2
	mean <- mean(x)
	sd <- sd(x)
	n <- length(x)
	se <- sd/sqrt(n)
	CI <- qt(P, df=n-1)*se # assume t-dist
	CI.vec <- c((mean-CI), mean, (mean+CI))
	CI.vec
}

### length(x)= 3; written c(AA, Aa, aa)
freqz <- function(x) {
	A = (2*(x[1]) + x[2]) / (sum(x)*2)
	a = (2*(x[3]) + x[2]) / (sum(x)*2)
	X = c(A,a); X
}

MatSum <- function(A){
	rowsum <- cbind(A, rowSums(A))
	B <- rbind(rowsum, colSums(rowsum))
	B
}

NormFun <- function(n=1000, mu=400, sdv=25, seed=666) {
	set.seed(seed)
	nD <- rnorm(n, mu, sdv)
	hist(nD, col='gray88', main='', xlab=''); par(new=T)
	plot(min(nD):max(nD), dnorm(min(nD):max(nD), mean(nD), sd(nD)), 'l', col='navy', lwd= 2, axes= F, 	xlab='', ylab='', bty= 'n')
}

#################################
### See separate file for ODEs
#################################

##################
##################
##################
##################
##################
##### Basic ######
##################
### Statistics ###
##################
##################
##################
plot(mydata$dbh, mydata$Heartwood, pch= 19, col='black', main='Heartwood Area vs. DBH', ylab='Heartwood area (cm^2)', xlab='Diameter @ breast height')

#######################
### The Linear Model
#######################
fit <- lm(Heartwood ~ dbh, data = mydata)
summary(fit)
names(fit)
coef(fit)
residuals(fit)
abline(fit, col='red', lty = 4, lwd = 2)
legend('topleft', legend=c('lm(fit)'), col= 'red', lty= 4, bg= 'gray95')

#####################
# Correlation tests
#####################
cor.test(~ Heartwood + dbh, data = mydata)    # Parametric 'Pearson' correlation
cor.test(~ Heartwood + dbh, data = mydata, method = 'spearman') # Non-parametric alternative

########################
# Parametric comparison
# of 2 treatment means
# t-test on SapDepth
########################
##############################
### First we must check ######
### that variances are equal #
##############################
var.test(SapDepth ~ Temp, data = mydata) ### Temp is created below!!!!!!!!!
### In this case the answer is YES!

##############################
### Next we must check #######
### that fits Normal dist ####
### Kolmogorov-Smirnov test ##
##############################
ks.test(mydata$SapDepth, pnorm); hist(mydata$SapDepth)
### In this case the answer is NO! They are different

################################
### Aside: Do two samples come
### from the SAME distribution?
################################
normal = rnorm(50)
binomal = rbinom(50, 100, 0.33)
hist(normal); hist(binomal)
ks.test(normal, binomal)
### NOPE! ###

#############################
### Create a new factor (Temp)
### for comparisons & 
### add it to the data frame
###############################
Temp = rep(0, nrow(mydata))          # create new factor & use which() to compare in t-test
Temp[which(mydata$season == 'Fall' | mydata$season == 'Winter')] = 'COOL'
Temp[which(mydata$season == 'Spring' | mydata$season == 'Summer')] = 'WARM'
mydata = cbind(mydata, Temp)              # add factor to mydata

### var.equal = T/F
### paired = T/F
t.test(SapDepth ~ Temp, data = mydata, var.equal = F, paired = F)

###############################
# Non-parametric comparisons
# of 2 treatment means
# Mann-Whitney U & Wilcoxon U
###############################
### package for computing exact p-values in case of rank ties
### Be careful, exact tests do not calculate conf.int accurately
require(exactRankTests)
### Mann-Whitney test is Wilcoxon test with paired = F
wilcox.test(SapDepth ~ Temp, data= mydata, paired= F)
wilcox.exact(SapDepth ~ Temp, data= mydata, paired= F)

### Wilcoson U test (for paired data; with paired = T)
wilcox.test(SapDepth ~ Temp, data= mydata, paired= T)
wilcox.exact(SapDepth ~ Temp, data= mydata, paired= T)

############################
### Parametric comparisons
### of > 2 treatment means
########## ANOVA ###########
############################
# Conduct ANOVA on SapArea by spp. (4 treatments)
##############################
### First we must check ######
### that variances are equal #
### just like in t-test ######
### Bartlett's or Fligner- ###
### Killeen test #############
##############################
bartlett.test(SapArea ~ spp, data= mydata)
fligner.test(SapArea ~ spp, data= mydata)
### bartlett = NOT equal; Fligner-Killeen test = equal (go with F-K) 

#####################
### Now the model ###
#####################
Sap.model = aov(SapArea ~ spp, data = mydata)
summary(Sap.model) # no differences
names(Sap.model)
effects(Sap.model)
### Take a look and see
boxplot(SapArea ~ spp, data= mydata, ylab= 'SapArea', col= 'darkred')

#####################
# Non-parameteric rank
# Kruskal-Wallis test
# for > 2 means
#####################
# K-W test of Sapwood Area by spp.
kruskal.test(SapArea ~ spp, data = mydata)
### still no differences but it's close with a rank test

#################
#####################################
#### Making Barplot with Error Bars
#####################################
##################
boxplot(SapDepth ~ spp, data= mydata, ylab= 'Sapwood Depth', col='gray50')
#### We'll do SapDepth and look for differences among spp cause it looks promising from Fig.1

### First the ANOVA as above but with SapDepth this time
fligner.test(SapDepth ~ spp, data= mydata) ### Good, they're equal
aov.spp = aov(SapDepth ~ spp, data= mydata)
summary(aov.spp) ### Differences!

###################################
### Function for doing error bars
###################################
BoxplotBars <- function(x, bar, groups, bcol="gray88", lcol=1, axis=1, type=1, w=50){
	y <- barplot(x, ylim= c(0, (max(x) + axis*max(bar))), names = groups, ylab = deparse(substitute(x)), col= bcol)
	g = (max(y) - min(y)) / w               # width of the hats & bases of bars
	for (i in 1:length(y)){
		lines(c(y[i], y[i]), c(x[i]+bar[i], x[i]-bar[i]), col= lcol, lty= type) # error bar
		lines(c(y[i]-g, y[i]+g), c(x[i]+bar[i], x[i]+bar[i]), col= lcol, lty= type) # top bar
		lines(c(y[i]-g, y[i]+g), c(x[i]-bar[i], x[i]-bar[i]), col= lcol, lty= type) # bottom bar
		} }

###############################
### Produce necessary #########
### arguments for error.bars ##
### function above ############
###############################
Sapwood.Depth.m = tapply(mydata$SapDepth, mydata$spp, mean) ### get means by spp (same as above; Plotting)
Sapwood.Depth.sd = tapply(mydata$SapDepth, mydata$spp, sd)  ### sd for error terms
N = as.numeric(table(mydata$spp))
sem = Sapwood.Depth.sd/sqrt(N)
ci = (Sapwood.Depth.m + (sem * 1.96)) - (Sapwood.Depth.m - (sem * 1.96))
labels = levels(mydata$spp)

### Make the Barplot with Error Bars using function
BoxplotBars(Sapwood.Depth.m, ci, labels, lcol=2, axis=1.5, type=1) ### with CI95s
BoxplotBars(Sapwood.Depth.m, sem, labels, lcol=2, axis=1.5, type=1) ### with SEMs
### play with the function's argument to change the appearance of the bars

####################################
### Use boxplot2() to do the same
####################################
require(gplots)
lower.ci <- Sapwood.Depth.m - (1.96 * sem)
upper.ci <- Sapwood.Depth.m + (1.96 * sem)
barplot2(Sapwood.Depth.m, ci.l= lower.ci, ci.u= upper.ci, plot.ci= TRUE, col='slateblue', ci.lty=4)




#############################################
#### Used for creating Pop6C.csv ############
#############################################
s1 = 0.97; s2 = 0.08; s3 = 0.93; s4 = 0.06; s5 = 0.99
FA = 0.14; FJ = FA*0.66       
B = 0.015
F.sel = 0.12
S.sel = 0.9
I.pop = c(20,14,9,25,15,19); Gen = 50
PM <- matrix(c(s1*(1-B), FJ, FA, 0, FJ*F.sel, FA*F.sel, 
              s2*(1-B), s3*(1-B), 0, 0, 0, 0, 
              0, s4*(1-B), s5*(1-B), 0, 0, 0,
              s1*B, 0, 0, s1*S.sel, 0, 0,
              s2*B, s3*B, 0, s2*S.sel, s3*S.sel, 0,
              0, s4*B, s5*B, 0, s4*S.sel, s5*S.sel), 6, 6, byrow=T); PM

pop <- pop.projection(PM, I.pop, 20); pop

